We have determined the set of genes that change expression upon perturbation of each target locus. Here, we plot the expression changes for all significant results.
## data
sce <- readRDS(paste0(dir, "results/03_sce_POC_Tcells.goodQual.calls.NORM.Rds"))
## gRNA annotation
gRNA_ann <- read.table(paste0(dir, "data/gRNA_library.tsv"), sep="\t", header = TRUE)
## DEGs
degs <- read.table(paste0(dir, "results/06_DEgenes.tsv"), header = TRUE)
# all DE testing results
res <- read.table(paste0(dir, "results/06_DEresults_targetLevel.tsv"), header = TRUE)
res$tier <- degs[match(res$pair_tgt, degs$pair_tgt),'tier']
res$distance <- degs[match(res$pair_tgt, degs$pair_tgt),'distance']
# define NT cells for plotting
nt_guides <- gRNA_ann[gRNA_ann$class=="NT",]$ID
cells <- colnames(sce[,sce$call == "unique"])
cells.neg <- apply(counts(altExp(sce, 'gRNA_calls'))[nt_guides, cells], 1, function(x) names(which(x)))
cells.neg <- unlist(cells.neg)
# use 5K
set.seed(749)
cells.neg <- sample(cells.neg, size=5e3)
First, we plot the expression of the genes that are expected to be downregulated, from positive control perturbations.
selected <- setdiff(unique(gRNA_ann[gRNA_ann$class=="TSS",]$target), c("IL12A_TSS", "TBX1_TSS")) # IL12A and TBX1 are not expressed
## plot every target
plots <- list()
for(target in selected){
# expected gene
gene <- unique(gRNA_ann[gRNA_ann$target==target,]$expected_DE_gene)
# DE results
hit <- res[res$target==target & res$gene_id==gene,]
# plot
plots[[target]] <- plot_gene_sc(sce = sce, cells.neg = cells.neg, hit = hit, thr=0.05)
}
ggarrange(plotlist = plots, ncol=2, nrow=17, align="hv")
selected <- unique(gRNA_ann[gRNA_ann$class=="LCR",]$target)
## plot every target
plots <- list()
for(target in selected){
# expected gene
gene <- unique(gRNA_ann[gRNA_ann$target==target,]$expected_DE_gene)
# DE results
hit <- res[res$target==target & res$gene_id==gene,]
# plot
plots[[target]] <- plot_gene_sc(sce = sce, cells.neg = cells.neg, hit = hit, thr=0.05)
}
ggarrange(plotlist = plots, ncol=2, nrow=2, align="hv")
selected <- setdiff(unique(gRNA_ann[gRNA_ann$class=="ENH",]$target), c("USP6NL_ENH", "TUBB2A_ENH")) # USP6NL and TUBB2A are not expressed
## plot every target
plots <- list()
for(target in selected){
# expected gene
gene <- unique(gRNA_ann[gRNA_ann$target==target,]$expected_DE_gene)
# DE results
hit <- res[res$target==target & res$gene_id==gene,]
# plot
plots[[target]] <- plot_gene_sc(sce = sce, cells.neg = cells.neg, hit = hit, thr=0.05)
}
ggarrange(plotlist = plots, ncol=2, nrow=13, align="hv")
Below are any other high- and medium-confidence hits, that are not the expected gene for intergenic perturbations.
## plot every target
selected <- degs[degs$class=="ENH" & !degs$expected & degs$tier!="low",]
plots <- list()
for(i in 1:nrow(selected)){
hit <- selected[i,]
plots[[i]] <- plot_gene_sc(sce = sce, cells.neg = cells.neg, hit = hit, thr=0.05)
}
ggarrange(plotlist = plots, ncol=2, nrow=21, align="hv")
## plot every target
selected <- degs[degs$class=="INTRON" & degs$tier!="low",]
plots <- list()
for(i in 1:nrow(selected)){
hit <- selected[i,]
plots[[i]] <- plot_gene_sc(sce = sce, cells.neg = cells.neg, hit = hit, thr=0.05)
}
ggarrange(plotlist = plots, ncol=2, nrow=12, align="hv")
## plot every target
selected <- degs[degs$class=="INTERGENIC" & degs$tier!="low",]
plots <- list()
for(i in 1:nrow(selected)){
hit <- selected[i,]
plots[[i]] <- plot_gene_sc(sce = sce, cells.neg = cells.neg, hit = hit, thr=0.05)
}
ggarrange(plotlist = plots, ncol=2, nrow=6, align="hv")
For intronic elements, we check directly the expression of the overlapping gene. LCA5L is not detected in at least 5% of cells and was not tested.
## plot every target
df <- data.frame(target = unique(gRNA_ann[gRNA_ann$class=="INTRON",]$target),
gene = unlist(lapply(strsplit(unique(gRNA_ann[gRNA_ann$class=="INTRON",]$target), "_"),'[[',1)))
df <- df[-which(df$target == "LCA5L_INTRON"),]
plots <- list()
for(i in 1:nrow(df)){
target <- df[i,1]
gene <- df[i,2]
hit <- res[res$target == target & res$gene_id == gene,]
plots[[i]] <- plot_gene_sc(sce = sce, cells.neg = cells.neg, hit = hit, thr=0.05)
}
ggarrange(plotlist = plots, ncol=2, nrow=5, align="hv")
We also plot the expression of the four closest genes (detected in at least 5% of the cells) for INTERGENIC targets.
## neighbourhood
gene_distance_target <- read.table(paste0(dir, "results/06_gene_target_distance.tsv"), header=TRUE)
## plot every target
df <- gene_distance_target[grep("INTERGENIC", gene_distance_target$target),]
plots <- list()
for(target in unique(gRNA_ann[gRNA_ann$class=="INTERGENIC",]$target)){
selected <- df[which(df$target==target & df$expressed),][1:4,]
for(i in 1:nrow(selected)){
hit <- res[res$target == target & res$gene_id == selected[i,'external_gene_name'],]
plots[[paste0(target,i)]] <- plot_gene_sc(sce = sce, cells.neg = cells.neg, hit = hit, thr=0.05)
}
}
ggarrange(plotlist = plots, ncol=2, nrow=6, align="hv")
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS: /usr/lib64/libblas.so.3.4.2
## LAPACK: /hpc/apps/2018/R/v4.0.2.app/lib/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggpubr_0.4.0 ggplot2_3.3.5
## [3] scales_1.1.1 scran_1.16.0
## [5] SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.2
## [7] DelayedArray_0.14.1 matrixStats_0.60.1
## [9] Biobase_2.48.0 GenomicRanges_1.40.0
## [11] GenomeInfoDb_1.24.2 IRanges_2.22.2
## [13] S4Vectors_0.26.1 BiocGenerics_0.34.0
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 tools_4.0.2
## [3] backports_1.2.1 bslib_0.3.0
## [5] utf8_1.2.2 R6_2.5.1
## [7] irlba_2.3.3 vipor_0.4.5
## [9] DBI_1.1.1 colorspace_2.0-2
## [11] withr_2.4.2 tidyselect_1.1.1
## [13] gridExtra_2.3 curl_4.3.2
## [15] compiler_4.0.2 BiocNeighbors_1.6.0
## [17] labeling_0.4.2 sass_0.4.0
## [19] stringr_1.4.0 digest_0.6.27
## [21] foreign_0.8-81 rmarkdown_2.7
## [23] rio_0.5.26 XVector_0.28.0
## [25] scater_1.16.2 pkgconfig_2.0.3
## [27] htmltools_0.5.2 highr_0.8
## [29] fastmap_1.1.0 limma_3.44.3
## [31] readxl_1.3.1 rlang_0.4.11
## [33] DelayedMatrixStats_1.10.1 farver_2.1.0
## [35] jquerylib_0.1.4 generics_0.1.0
## [37] jsonlite_1.7.2 BiocParallel_1.22.0
## [39] zip_2.1.1 dplyr_1.0.7
## [41] car_3.0-10 RCurl_1.98-1.3
## [43] magrittr_2.0.1 BiocSingular_1.4.0
## [45] GenomeInfoDbData_1.2.3 Matrix_1.3-2
## [47] Rcpp_1.0.7 ggbeeswarm_0.6.0
## [49] munsell_0.5.0 fansi_0.5.0
## [51] abind_1.4-5 viridis_0.6.1
## [53] lifecycle_1.0.0 stringi_1.5.3
## [55] yaml_2.2.1 edgeR_3.30.3
## [57] carData_3.0-4 zlibbioc_1.34.0
## [59] grid_4.0.2 dqrng_0.3.0
## [61] forcats_0.5.1 crayon_1.4.1
## [63] lattice_0.20-41 cowplot_1.1.1
## [65] haven_2.3.1 hms_1.1.0
## [67] locfit_1.5-9.4 knitr_1.31
## [69] pillar_1.6.2 igraph_1.2.6
## [71] ggsignif_0.6.1 glue_1.4.2
## [73] evaluate_0.14 data.table_1.14.0
## [75] vctrs_0.3.8 cellranger_1.1.0
## [77] gtable_0.3.0 purrr_0.3.4
## [79] tidyr_1.1.3 assertthat_0.2.1
## [81] openxlsx_4.2.3 xfun_0.22
## [83] rsvd_1.0.5 broom_0.7.6
## [85] rstatix_0.7.0 viridisLite_0.4.0
## [87] tibble_3.1.4 beeswarm_0.4.0
## [89] statmod_1.4.36 ellipsis_0.3.2